* Author: Joe Tatarka
* Name: 3_1_gws_build.do
* Purpose: Combine raw GWS data into a panel of users
* Basically when I downloaded the GWS data I split it into two parts: a panel of users and the actual app events. This file is cleaning the panel of users. Ultimately going to use this panel of users to see the total number of days that users are in the panel for a given month to account for changes in panel over time.

* Set Global File Paths

global root = "T:/service_industries/replication_package"

global raw_root = "${root}/datasets/raw"
global intermediate_root = "${root}/datasets/intermediate"
global built_root = "${root}/datasets/built"
global exhibits_root = "${root}/exhibits"

* [INSERT RAW GWS DATA PATH HERE]
global gws_root = "T:/generally_useful_datasets/gws_06_17_24_pull/clean"


************************************************************
******* 1. Loop through raw panel files and clean them
*************************************************************
local i = 1
local files : dir "${gws_root}/panel/" files "*.csv"
quietly foreach file in `files' {
 
 import delimited "${gws_root}/panel/`file'", clear

 drop v1

replace panelistid = substr(panelistid, 8,7)
compress panelistid

gen date = mdy(month, day, year)
format date %td
drop month day year

gen byte gender_test = (gender == "Male" & gender != "")
replace gender_test = . if gender == ""
drop gender
rename gender_test gender
label define gender 0 "Female" 1 "Male"
label values gender gender  

gen byte ethnicity_test = 1 if ethnicity == "White"
replace ethnicity_test = 2 if ethnicity == "Black"
replace ethnicity_test = 3 if ethnicity == "Hispanic"
replace ethnicity_test = 4 if ethnicity == "Asian-Islander-Alaska-Mixed-Other"
drop ethnicity
rename ethnicity_test ethnicity
label define ethnicity 1 "White" 2 "Black" 3 "Hispanic" 4 "Asian+Other"
label values ethnicity ethnicity

replace intendedmarket = "Fort Myers" if intendedmarket == "Fort Myers - Cape Coral - Sarasota - Bradenton"
compress intendedmarket

tempfile panel_`i'

save `panel_`i'', replace

local i = `i' + 1

}
local k = `i' - 1

*****************************************************************
*** 2. Append Cleaned Files Together and Collapse
*****************************************************************
use `panel_1', clear 
quietly forval j = 2/`k' {
	append using `panel_`j'', force
	erase `panel_`j''
}

sort date 

**** We want to see the total number of days that panelists were in the month so we can compute aggregate app usage per day in a given month. We do this to account for changes in the panel; it is not guaranteed that someone will be in the panel for every day in the month.
gen year_month_gs = ym(year(date), month(date))
format year_month_gs %tm

gen panel_days = 1
collapse (sum) panel_days, by(panelistid year_month_gs)
collapse (sum) panel_days, by(year_month_gs)

label var panel_days "Total Panelist Days in Month"
label var year_month_gs "Year-month"

********************************************************************
*** 3. Save Intermediate Dataset
********************************************************************
save "${intermediate_root}/3_gws_build/3_1_gws_panel.dta", replace 





